# ======================
# Section 0: setup
# =====================
rm(list=ls())
library(data.table)
library(dplyr)
library(dtplyr)
library(ggplot2)
library(Hmisc)
library(stringr)
library(stargazer)
library(locfit)
library(RColorBrewer)
library(grid)
library(gtable)
library(sandwich)
library(plm)
library(lmtest)
library(parallel)
library(multiwayvcov)
library(lfe)
library(haven)
library(R.utils)
library(broom)

comb <- function(...) parse(text = paste(..., sep = ""))
grepv <- function(...) grep(..., value = T)
grepn <- function(string, dataset) {
  if ( !('data.frame' %in% class(dataset))) stop("Error: second argument must be a dataframe")
  if (class(string)!= 'character') stop("Error: first argument is a character")
  grep(string, names(dataset), value = T)
}

vecToNA <- function(x) {
  for (j in 1:length(x)) {
    if (j==1)
      myform <- paste("!is.na(",x[j],")")
    if (j>1)
      myform <- paste(myform,"&","!is.na(",x[j],")")
  }
  return(myform)
}

judge_n_cutoff = 100

data <- fread("build/Data_ready_with_instruments.csv",
              na.strings = c(".","NA"))


data[ , judgeyear := paste(judge,year, sep= "")]
data[ , judgemainyear := paste(first_judge_main,year, sep= "")]
data[ analysis_sample == T, landlord_n := .N, by = Landlord]
data[, district := factor(district)]


# ======================================================================
#
# APPENDIX TABLE C.1
#
# WATERFALL DIAGRAM BY # OF CASES
# ======================================================================

# =============== Waterfall diagram by # of cases =======================
d <- data
#r1 <- d[, .("Sample" = "Full",.N,"Cases" = length(unique(case_number)), "Judges" = length(unique(judge)))]
r2 <- d[ case_type  %in% c("FORCIBLE ENTRY AND DETAINER","JOINT ACTION"),
         .("Sample" = "Full",.N,"Cases" = length(unique(case_number)), "Judges" = length(unique(judge)))]
r3 <- d[ case_type  %in% c("FORCIBLE ENTRY AND DETAINER","JOINT ACTION") & defendant.business.guess == F & is_condo ==F
         , .("Sample" = "No businesses or condos",.N,"Cases" = length(unique(case_number)), "Judges" = length(unique(judge)))]
r4 <- d[  case_type %in% c("FORCIBLE ENTRY AND DETAINER","JOINT ACTION") & defendant.business.guess == F & is_condo == F &
            !is.na(First_Name) & !is.na(Last_Name)
          , .("Sample" = "Non-missing names",.N,"Cases" = length(unique(case_number)), "Judges" = length(unique(judge)))]
r5 <- d[ ad_damnum < 100000 &  case_type %in% c("FORCIBLE ENTRY AND DETAINER","JOINT ACTION") & defendant.business.guess == F & is_condo ==F &
           !is.na(First_Name) & !is.na(Last_Name)
         , .("Sample" = "Damages < $100,000", .N,"Cases" = length(unique(case_number)), "Judges" = length(unique(judge)))]
r6 <- d[  ad_damnum < 100000 &  case_type %in% c("FORCIBLE ENTRY AND DETAINER","JOINT ACTION") & defendant.business.guess == F & is_condo ==F &
            !is.na(First_Name) & !is.na(Last_Name) & !is.na(judge) & judge != ""
          , .("Sample" = "Non-missing judge", .N,"Cases" = length(unique(case_number)), "Judges" = length(unique(judge)))]
r9 <- d[ bulk == F & judge_n > judge_n_cutoff & ad_damnum < 100000 &  case_type %in% c("FORCIBLE ENTRY AND DETAINER","JOINT ACTION") &
           defendant.business.guess == F & is_condo ==F & !is.na(First_Name) & !is.na(Last_Name) & !is.na(judge) & judge != "" & judge_n >judge_n_cutoff
         , .("Sample" = "Judge sees more than 100 cases per year",.N,"Cases" = length(unique(case_number)), "Judges" = length(unique(judge)))]
r10 <- d[ bulk == F & judge_n > judge_n_cutoff & ad_damnum < 100000 &  case_type %in% c("FORCIBLE ENTRY AND DETAINER","JOINT ACTION") &
            defendant.business.guess == F & is_condo ==F & !is.na(First_Name) & valid_courtroom==T & !is.na(Last_Name) & !is.na(judge) & judge != "" &
            judge_n >judge_n_cutoff
          , .("Sample" = "Valid Courtrooms",.N,"Cases" = length(unique(case_number)), "Judges" = length(unique(judge)))]

waterfall <- rbind(r2,r3,r4,r5,r6,r9,r10)
setnames(waterfall,"N","Named Individuals")
waterfall

# version without number of observations
waterfall[ , `Named Individuals` := NULL]
get_tex_tabular(file_name="./Outputs/Supplementary data_Table_C1.tex",
                waterfall, summary = F, type = "latex", rownames = F, notabular = T)



#===============================================================================
# ADDITIONAL CLEANING

data[ , ad_damnum := as.numeric(ad_damnum)]
data[is.na(ad_damnum) | ad_damnum == "", ad_damnum := 0]
data[ case_type == "FORCIBLE ENTRY AND DETAINER" & ad_damnum > 0, case_type := "JOINT ACTION"]
data[ case_type == "JOINT ACTION" & ad_damnum == 0, case_type := "FORCIBLE ENTRY AND DETAINER"]


#replace judge as random_judge
data[ , judge := NULL]
data[, judge := random_judge] #first_judge_main]

data[,is_condo_old := grepl(" CONDO",plaintiffs)]

# defining more variables
data[, district_ris := district]
data[, district := district_guess_based_on_cn]



#==================================================================================
#
# APPENDIX TABLE G.6 
#
#  Evaluating alternative stringency measures (CORRELATIONS)
#==================================================================================

# various forms of judge leniency
cortab <- cor(data[iv_sample == T, .(Stringency_loo,Stringency_conti,Stringency_amount,
                                     Stringency_stays)], use = "complete")
round(cortab,3)

#fixing names
rownames(cortab) <- c("Stringency (eviction order)","Stringency (continuance)","Stringency (judgment amount)","Stringency (stays)")
colnames(cortab) <- c("Eviction Order","Continuance","Amount","Stays")


get_tex_tabular(file_name="./Outputs/Supplementary data_Table_G6.tex",
                cortab, summary = F, type = "latex", rownames = T, digits = 3, notabular = T)




# ================================================================================
#
# APPENDIX TABLE G.9
#
# Stringency on judgment amount
# ================================================================================

data[, .(.N, mean(is.na(judgment_amount))), by = case_type][order(N)]

r1 <- lm(log(judgment_amount) ~ Stringency_loo + as.factor(year)*as.factor(district), data = data[iv_sample == T & case_type == "JOINT ACTION"])

r2 <- lm(log(judgment_amount) ~ Stringency_loo + as.factor(year)*as.factor(district), data = data[iv_sample == T & case_type == "JOINT ACTION" & evicted == T])
r3 <- lm(log(judgment_amount) ~ Stringency_loo + log(ad_damnum + 1 ) + as.factor(year)*as.factor(district), data = data[analysis_sample == T & case_type == "JOINT ACTION" & evicted == T])
r4 <- lm(log(judgment_amount) ~ Stringency_loo + log(ad_damnum + 1 ) + I(median_rent/1000)  + pct_black + pct_hisp +  gender + pred.bla + pred.whi +
           pred.his + pred.asi + no_attorney +  as.factor(year)*as.factor(district), data = data[analysis_sample == T & case_type == "JOINT ACTION" & evicted == T])

r5 <- lm(log(amount + 1) ~ Stringency_loo + as.factor(year)*as.factor(district), data = data[iv_sample == T & case_type == "JOINT ACTION"])
r6 <- lm(log(amount + 1) ~ Stringency_loo + as.factor(year)*as.factor(district), data = data[iv_sample == T & case_type == "JOINT ACTION" & evicted == T ])
r7 <- lm(log(amount + 1) ~ Stringency_loo + log(ad_damnum + 1) + as.factor(year)*as.factor(district), data = data[iv_sample == T & case_type == "JOINT ACTION" & evicted == T ])
r8 <- lm(log(amount + 1) ~ Stringency_loo + log(ad_damnum + 1) +  pct_black + pct_hisp +  gender + pred.bla + pred.whi +
           pred.his + pred.asi + no_attorney + as.factor(year)*as.factor(district), data = data[iv_sample == T & case_type == "JOINT ACTION" & evicted == T])

r9 <- lm(  amount_diff ~ Stringency_loo + ad_damnum  + as.factor(year)*as.factor(district), data = data[analysis_sample == T & case_type == "JOINT ACTION" & evicted == T & amount < 24000])
r10 <- lm( amount_diff ~ Stringency_loo + ad_damnum  + as.factor(year)*as.factor(district), data = data[iv_sample == T & case_type == "JOINT ACTION" & evicted == T & amount < 24000])

regs <- list(r2,r3,r4)
SEs <-  lapply(regs, function(x)  coeftest(x, vcov=cluster.vcov(x, ~ judgeyear))[,2] )

stargazer(regs, omit.stat = c("ser","f"), se = SEs, keep = c("*Strin*","*damnum*","Const*"),type = "text")
stargazer(regs, omit.stat = c("ser","f"), se = SEs, keep = c("*Strin*","*damnum*","Const*"),
          out = "/Outputs/Supplementary data_Table_G9.tex")



# ==================================================
# 
# APPENDIX TABLE G.11
#
# Monotonicity check 
# ==================================================


reg4 <- lm(evicted ~ Stringency + case_type + ad_damnum +  I(median_rent/1000) + pct_black + pct_hisp   + gender + pred.bla + pred.whi + pred.his + pred.asi + no_attorney  + factor(year) * district ,
           data = data[ iv_sample == T & gender != "either" ]   )

r1 <- reg4
r2 <- lm(evicted ~ Stringency  + ad_damnum + I(median_rent/1000) + pct_black + pct_hisp   + gender + pred.bla + pred.whi + pred.his + pred.asi + no_attorney  + factor(year) * district ,
         data = data[ iv_sample == T & gender != "either" & case_type == "JOINT ACTION"]   )
r3 <- lm(evicted ~ Stringency   + ad_damnum + I(median_rent/1000) + pct_black + pct_hisp   + gender + pred.bla + pred.whi + pred.his + pred.asi + no_attorney  + factor(year) * district ,
         data = data[ iv_sample == T & gender != "either" & case_type == "FORCIBLE ENTRY AND DETAINER"]   )
r4 <- lm(evicted ~ Stringency  + case_type + ad_damnum + I(median_rent/1000) + pct_black + pct_hisp    + pred.bla + pred.whi + pred.his + pred.asi + no_attorney  + factor(year) * district ,
         data = data[  iv_sample == T &  gender != "either" & gender == "male"]   )
r5 <- lm(evicted ~ Stringency  + case_type + ad_damnum + I(median_rent/1000) + pct_black + pct_hisp    + pred.bla + pred.whi + pred.his + pred.asi + no_attorney  + factor(year) * district ,
         data = data[  iv_sample == T &  gender != "either" & gender == "female"]   )
r6 <- lm(evicted ~ Stringency  + case_type + ad_damnum + I(median_rent/1000) + pct_black + pct_hisp   + gender + pred.bla + pred.whi + pred.his + pred.asi  + factor(year) * district ,
         data = data[  iv_sample == T & gender != "either" & no_attorney == T]   )
r7 <- lm(evicted ~ Stringency  + case_type + ad_damnum + I(median_rent/1000) + pct_black + pct_hisp   + gender + pred.bla + pred.whi + pred.his + pred.asi   + factor(year) * district ,
         data = data[  iv_sample == T & gender != "either" & no_attorney == F]   )
r8 <- lm(evicted ~ Stringency  + case_type + ad_damnum + I(median_rent/1000) + pct_black + pct_hisp   + gender + no_attorney  + factor(year) * district ,
         data = data[  iv_sample == T & gender != "either"  & pred.bla>.75]   )
r9 <- lm(evicted ~ Stringency  + case_type + ad_damnum + I(median_rent/1000) + pct_black + pct_hisp   + gender + no_attorney  + factor(year) * district ,
         data = data[  iv_sample == T & gender != "either"  & pred.his>.75]   )
r10 <- lm(evicted ~ Stringency  + case_type + ad_damnum + I(median_rent/1000) + pct_black + pct_hisp   + gender + no_attorney  + factor(year) * district ,
          data = data[  iv_sample == T & gender != "either"  & landlord_n > 5]   )
r11 <- lm(evicted ~ Stringency  + case_type + ad_damnum + I(median_rent/1000) + pct_black + pct_hisp   + gender + no_attorney  + factor(year) * district ,
          data = data[  iv_sample == T & gender != "either"  & landlord_n <= 5]   )
regs <- list(r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11)
require(lmtest)
require(sandwich)


sample = c("Main","Joint Action","Forcible Entry and Detainer","Males","Females", "No attorney","Attorney",
           "Black","Hispanic","Larger landlords","Smaller landlords")
coefs  = sapply(regs, function(x) coef(x)[[2]])
ses  = sapply(regs, function(x)  coeftest(x, vcov=cluster.vcov(x, ~ judgeyear))[2,2] )
require(broom)
pvals = sapply(regs, function(x)  coeftest(x, vcov=cluster.vcov(x, ~ judgeyear))[2,4] ) %>% round(4)
nobs = sapply(regs, function(x) length(residuals(x)) )

mono_check <- data.table("Sample" = sample,
                         "Coefficient" = coefs,
                         "Standard Errors" = ses,
                         "P-Value" = pvals,
                         "Observations" = nobs)


mono_check[, 2:5] <- round(mono_check[,-1],3)
get_tex_tabular(file_name="./Outputs/Supplementary data_Table_G11.tex",
                mono_check, summary = F, type = "latex", rownames = F, notabular = T, digits = 3,
                col_nums = c(2,3,4),tot_cols =5,digs =3)



#====================================================================
# 
# APPENDIX TABLE G.14 
#
#table about characteristics break down given available judge data
#====================================================================

sample <- c("Number of Judges","Number of Total Cases","Stringency Percentage Point Difference Between 90th and 10th Percntiles")
race_num <- data[,length(unique(judge)),by="judge_attr_race"]
race_case_num <- data[,length(unique(case_number)),by="judge_attr_race"]
race_ninety_tenth_quantile_diff <- data[,quantile(Stringency,.90,na.rm=T)-quantile(Stringency,.10,na.rm=T),by="judge_attr_race"]
gender_num <- data[,length(unique(judge)),by="judge_attr_gender"]
gender_case_num <- data[,length(unique(case_number)),by="judge_attr_gender"]
gender_ninety_tenth_quantile_diff <- data[,quantile(Stringency,.90,na.rm=T)-quantile(Stringency,.10,na.rm=T),by="judge_attr_gender"]
judge_attr_table <- data.table("Sample" = sample,
                               "Male" = c(gender_num[judge_attr_gender==1,V1],gender_case_num[judge_attr_gender==1,V1],gender_ninety_tenth_quantile_diff[judge_attr_gender==1,V1]),
                               "Female" = c(gender_num[judge_attr_gender==0,V1],gender_case_num[judge_attr_gender==0,V1],gender_ninety_tenth_quantile_diff[judge_attr_gender==0,V1]),
                               "White" = c(race_num[judge_attr_race==1,V1],race_case_num[judge_attr_race==1,V1],race_ninety_tenth_quantile_diff[judge_attr_race==1,V1]),
                               "Black" = c(race_num[judge_attr_race==0,V1],race_case_num[judge_attr_race==0,V1],race_ninety_tenth_quantile_diff[judge_attr_race==0,V1]),
                               "Hispanic" = c(race_num[judge_attr_race==2,V1],race_case_num[judge_attr_race==2,V1],race_ninety_tenth_quantile_diff[judge_attr_race==2,V1]))

require(xtable)
tab2 = xtable(judge_attr_table, caption = "Judge Characteristics Breakdown" , digits =3 )
print(tab2, file = paste0(dirstring,"./Outputs/Supplementary data_Table_G14.tex"), include.rownames = F,  booktabs = T)



#====================================================================
# 
# APPENDIX TABLE G.15 
#
#now applying basic regression interacted with gender/race judge attributes
#====================================================================

sample <- c("All","Male","Female","White","Black","Hispanic","Other")
for (gender_interaction in 0:1) {
  assign("given_gender",ifelse(gender_interaction == 0,"female","male"))
  r1 <- lm(evicted ~ Stringency  + case_type + ad_damnum + I(median_rent/1000) + pct_black + pct_hisp   + gender + pred.bla + pred.whi + pred.his + pred.asi + no_attorney  + factor(year) * district ,
           data = data[ iv_sample == T & gender != "either" & no_attorney == T & judge_attr_gender == gender_interaction])
  r2 <- lm(evicted ~ Stringency  + case_type + ad_damnum + I(median_rent/1000) + pct_black + pct_hisp    + pred.bla + pred.whi + pred.his + pred.asi  + factor(year) * district ,
           data = data[  iv_sample == T &  gender != "either" & no_attorney == T & gender == "male" & judge_attr_gender == gender_interaction]   )
  r3 <- lm(evicted ~ Stringency  + case_type + ad_damnum + I(median_rent/1000) + pct_black + pct_hisp    + pred.bla + pred.whi + pred.his + pred.asi  + factor(year) * district ,
           data = data[  iv_sample == T &  gender != "either" & no_attorney == T & gender == "female" & judge_attr_gender == gender_interaction]   )
  r4 <- lm(evicted ~ Stringency  + case_type + ad_damnum + I(median_rent/1000) + pct_black + pct_hisp   + gender + factor(year) * district ,
           data = data[  iv_sample == T & gender != "either" & no_attorney == T & pred.whi>.75 & judge_attr_gender == gender_interaction]   )
  r5 <- lm(evicted ~ Stringency  + case_type + ad_damnum + I(median_rent/1000) + pct_black + pct_hisp   + gender  + factor(year) * district ,
           data = data[  iv_sample == T & gender != "either" & no_attorney == T & pred.bla>.75 & judge_attr_gender == gender_interaction]   )
  r6 <- lm(evicted ~ Stringency  + case_type + ad_damnum + I(median_rent/1000) + pct_black + pct_hisp   + gender  + factor(year) * district ,
           data = data[  iv_sample == T & gender != "either" & no_attorney == T & pred.his>.75 & judge_attr_gender == gender_interaction]   )
  r7 <- lm(evicted ~ Stringency  + case_type + ad_damnum + I(median_rent/1000) + pct_black + pct_hisp   + gender  + factor(year) * district ,
           data = data[  iv_sample == T & gender != "either" & no_attorney == T & pred.oth>.75 & judge_attr_gender == gender_interaction]   )
  
  regs <- list(r1,r2,r3,r4,r5,r6,r7)
  coefs  = sapply(regs, function(x) coef(x)[[2]])
  assign(paste0(given_gender,"_coefs"),coefs)
  require(broom)
  pvals = sapply(regs, function(x) tidy(summary(x))[2,5])
  nobs = sapply(regs, function(x) length(residuals(x)) )
  
  mono_check <- data.table("Pro_Se Tenants" = sample,
                           "Coefficient" = coefs,
                           "P-Value" = pvals,
                           "Observations" = nobs)
  
  stargazer(mono_check, summary = F, round = 3)
  require(xtable)
  tab1 = xtable(mono_check, caption = paste0("Monotonicity Checks: ",given_gender," Judge") , digits =3 )
  print(tab1, file = paste0(dirstring,"/Build/",given_gender,"_judge_mono_replication.tex"), include.rownames = F,  booktabs = T)
  
}

for (race_interaction in 0:2) {
  assign("given_race",ifelse(race_interaction==0,"black",ifelse(race_interaction==1,"white","hispanic")))
  r1 <- lm(evicted ~ Stringency  + case_type + ad_damnum + I(median_rent/1000) + pct_black + pct_hisp   + gender + pred.bla + pred.whi + pred.his + pred.asi + no_attorney  + factor(year) * district ,
           data = data[ iv_sample == T & gender != "either" & no_attorney == T  & judge_attr_race == race_interaction])
  r2 <- lm(evicted ~ Stringency  + case_type + ad_damnum + I(median_rent/1000) + pct_black + pct_hisp    + pred.bla + pred.whi + pred.his + pred.asi  + factor(year) * district ,
           data = data[  iv_sample == T &  gender != "either" & no_attorney == T & gender == "male" & judge_attr_race == race_interaction]   )
  r3 <- lm(evicted ~ Stringency  + case_type + ad_damnum + I(median_rent/1000) + pct_black + pct_hisp    + pred.bla + pred.whi + pred.his + pred.asi  + factor(year) * district ,
           data = data[  iv_sample == T &  gender != "either" & no_attorney == T & gender == "female" & judge_attr_race == race_interaction]   )
  r4 <- lm(evicted ~ Stringency  + case_type + ad_damnum + I(median_rent/1000) + pct_black + pct_hisp   + gender + factor(year) * district ,
           data = data[  iv_sample == T & gender != "either" & no_attorney == T & pred.whi>.75 & judge_attr_race == race_interaction]   )
  r5 <- lm(evicted ~ Stringency  + case_type + ad_damnum + I(median_rent/1000) + pct_black + pct_hisp   + gender  + factor(year) * district ,
           data = data[  iv_sample == T & gender != "either" & no_attorney == T & pred.bla>.75 & judge_attr_race == race_interaction]   )
  r6 <- lm(evicted ~ Stringency  + case_type + ad_damnum + I(median_rent/1000) + pct_black + pct_hisp   + gender  + factor(year) * district ,
           data = data[  iv_sample == T & gender != "either" & no_attorney == T & pred.his>.75 & judge_attr_race == race_interaction]   )
  r7 <- lm(evicted ~ Stringency  + case_type + ad_damnum + I(median_rent/1000) + pct_black + pct_hisp   + gender  + factor(year) * district ,
           data = data[  iv_sample == T & gender != "either" & no_attorney == T & pred.oth>.75 & judge_attr_race == race_interaction]   )
  
  regs <- list(r1,r2,r3,r4,r5,r6,r7)
  coefs  = sapply(regs, function(x) coef(x)[[2]])
  assign(paste0(given_race,"_coefs"),coefs)
  require(broom)
  pvals = sapply(regs, function(x) tidy(summary(x))[2,5])
  nobs = sapply(regs, function(x) length(residuals(x)) )
  
  mono_check <- data.table("Pro_Se_Tenants" = sample,
                           "Coefficient" = coefs,
                           "P-Value" = pvals,
                           "Observations" = nobs)
  
  stargazer(mono_check, summary = F, round = 3)
  require(xtable)
  tab1 = xtable(mono_check, caption = paste0("Monotonicity Checks: ",given_race," Judge") , digits =3 )
  print(tab1, file = paste0(dirstring,"/Build/",given_race,"_judge_mono_replication.tex"), include.rownames = F,  booktabs = T)
  
}

#making a single table of coefficients for each of the above interacted terms 

sample <- c("All","Male","Female","White","Black","Hispanic")
mono_check <- data.table("Pro Se Teannts" = sample,
                         "Male Judges" = male_coefs[1:6],
                         "Female Judges" = female_coefs[1:6],
                         "White Judges" = white_coefs[1:6],
                         "Black Judges" = black_coefs[1:6])

stargazer(mono_check, summary = F, round = 3)
require(xtable)
tab1 = xtable(mono_check, caption = "Monotonicity Checks, Coefficient of Stringency" , digits =3 )
print(tab1, file = paste0(dirstring,"./Outputs/Supplementary data_Table_G15.tex"), include.rownames = F,  booktabs = T)


